#!/usr/bin/env python3
"""
Phase 6: Referee R1 Revisions — Paper A (Twin Deficits)

Tests:
  A. Winsorization robustness for CA decomposition (p1/p99, p5/p95)
  B. Trade share vs aging — demonstrate channel mismatch mechanism
  C. Fiscal × trade-dominance interaction
  D. KAOPEN restoration probe — 2×3 factorial + within-tercile tests

Output: extensions/output/tables/phase6_*.md
"""

import pandas as pd
import numpy as np
from pathlib import Path
import sys

# ── Paths ──────────────────────────────────────────────────────────────────
PROJECT_DIR = Path(__file__).resolve().parent.parent
ROOT_DIR = PROJECT_DIR.parent
MULTILATERAL_DIR = ROOT_DIR / "multilateral"

OUT_TABLES = PROJECT_DIR / "output" / "tables"
OUT_TABLES.mkdir(parents=True, exist_ok=True)

sys.path.insert(0, str(MULTILATERAL_DIR / "src"))
from model import PanelGLS


def load_panel():
    path = PROJECT_DIR / "data" / "processed" / "extensions_panel.csv"
    df = pd.read_csv(path)
    df = df[df["year"] <= 2024].copy()
    return df


def stars(p):
    if p < 0.01:
        return "***"
    elif p < 0.05:
        return "**"
    elif p < 0.1:
        return "*"
    return ""


def run_gls(df, dep_var, regressors, label):
    """Run PanelGLS and return results dict."""
    cols = [dep_var] + regressors
    sub = df.dropna(subset=cols).copy()
    if len(sub) < 50:
        print(f"  SKIP {label}: only {len(sub)} obs")
        return None

    model = PanelGLS()
    model.fit(sub[dep_var].values, sub[regressors].values,
              sub["iso3"].values, sub["year"].values)
    print(f"\n  {label}  (N={model.n_obs}, C={model.n_countries}, R²={model.r_squared:.3f})")

    res = model.to_dataframe(feature_names=regressors)
    res["model"] = label
    res["dep_var"] = dep_var
    res["n_obs"] = model.n_obs
    res["n_countries"] = model.n_countries
    res["r_squared"] = model.r_squared
    res["rho"] = model.rho
    return res


def get_coef(res, var):
    """Extract coefficient and p-value for a variable from results df."""
    if res is None:
        return None, None
    row = res[res["variable"] == var]
    if len(row) == 0:
        return None, None
    return row.iloc[0]["coefficient"], row.iloc[0]["p_value"]


def winsorize(s, lower, upper):
    """Winsorize a series at given percentiles."""
    lo = s.quantile(lower / 100)
    hi = s.quantile(upper / 100)
    return s.clip(lo, hi)


# ═══════════════════════════════════════════════════════════════════════════
# Part A: Winsorization Robustness
# ═══════════════════════════════════════════════════════════════════════════

def part_a_winsorization(df):
    """Re-run CA decomposition with winsorized trade/income balance."""
    print("\n" + "=" * 70)
    print("PART A: Winsorization Robustness for CA Decomposition")
    print("=" * 70)

    demo_vars = ["Z_1", "Z_2", "Z_3"]
    eba_controls = ["fiscal_bal_gdp", "nfa_gdp_lag", "log_rel_opw", "kaopen"]
    eba_controls = [c for c in eba_controls if c in df.columns
                    and df[c].notna().sum() > 200]
    base_vars = demo_vars + eba_controls

    results = []

    for win_label, lo, hi in [("Raw", 0, 100), ("p1/p99", 1, 99), ("p5/p95", 5, 95)]:
        dfw = df.copy()
        if lo > 0:
            for v in ["trade_balance_gdp", "income_balance_gdp"]:
                if v in dfw.columns:
                    dfw[v] = winsorize(dfw[v].dropna(), lo, hi)
                    # Re-align after winsorization
                    dfw.loc[dfw[v].isna(), v] = np.nan

        # Z → trade_balance
        r = run_gls(dfw, "trade_balance_gdp", base_vars,
                     f"Z → Trade ({win_label})")
        if r is not None:
            results.append(r)

        # Z → income_balance (non-trade residual)
        r = run_gls(dfw, "income_balance_gdp", base_vars,
                     f"Z → NonTrade ({win_label})")
        if r is not None:
            results.append(r)

        # OADR → trade_balance
        oadr_vars = ["old_dep"] + [c for c in eba_controls if c != "kaopen"]
        r = run_gls(dfw, "trade_balance_gdp", oadr_vars,
                     f"OADR → Trade ({win_label})")
        if r is not None:
            results.append(r)

    # Write output
    lines = ["# Part A: Winsorization Robustness — CA Decomposition", ""]
    lines.append("Tests whether the Z→non-trade / Z→trade null pattern is robust to ")
    lines.append("symmetric winsorization at p1/p99 and p5/p95.")
    lines.append("")

    if results:
        rdf = pd.concat(results, ignore_index=True)

        # Summary table: Z₁ coefficients across winsorization levels
        lines.append("**Table R1: Z₁ and OADR Coefficients Under Winsorization**")
        lines.append("")
        lines.append("| Specification | Dep Var | Winsorization | Z₁/OADR Coef | SE | p-value | N | R² |")
        lines.append("|---------------|---------|---------------|-----------|----|---------|----|-----|")
        for m in rdf["model"].unique():
            sub = rdf[rdf["model"] == m]
            row0 = sub.iloc[0]
            # Get the key variable
            for kv in ["Z_1", "old_dep"]:
                kvrow = sub[sub["variable"] == kv]
                if len(kvrow) > 0:
                    kvrow = kvrow.iloc[0]
                    sig = stars(kvrow["p_value"])
                    lines.append(
                        f"| {m} | {row0['dep_var']} | - | "
                        f"{kvrow['coefficient']:.1f}{sig} | "
                        f"{kvrow['std_error']:.1f} | {kvrow['p_value']:.3f} | "
                        f"{row0['n_obs']:,} | {row0['r_squared']:.3f} |")

    md_path = OUT_TABLES / "phase6_winsorization.md"
    md_path.write_text("\n".join(lines))
    print(f"\n  Output: {md_path}")
    return results


# ═══════════════════════════════════════════════════════════════════════════
# Part B: Trade Share vs Aging — Demonstrate Channel Mismatch
# ═══════════════════════════════════════════════════════════════════════════

def part_b_trade_share(df):
    """Show that trade share of CA declines with aging."""
    print("\n" + "=" * 70)
    print("PART B: Trade Share vs Aging (Channel Mismatch Mechanism)")
    print("=" * 70)

    dfw = df.copy()

    # Compute trade share = |trade_bal| / (|trade_bal| + |nontrade|)
    dfw = dfw.dropna(subset=["trade_balance_gdp", "income_balance_gdp", "old_dep"]).copy()
    dfw["abs_trade"] = dfw["trade_balance_gdp"].abs()
    dfw["abs_nontrade"] = dfw["income_balance_gdp"].abs()
    dfw["abs_total"] = dfw["abs_trade"] + dfw["abs_nontrade"]
    dfw["trade_share"] = dfw["abs_trade"] / dfw["abs_total"].clip(lower=0.01)
    # Cap at 1 for numerical safety
    dfw["trade_share"] = dfw["trade_share"].clip(0, 1)

    print(f"  Trade share: mean={dfw['trade_share'].mean():.3f}, "
          f"median={dfw['trade_share'].median():.3f}")

    results = []

    # Test 1: Regress trade_share on OADR with FE+year
    controls = ["nfa_gdp_lag", "log_rel_opw", "kaopen"]
    controls = [c for c in controls if c in dfw.columns and dfw[c].notna().sum() > 200]

    r = run_gls(dfw, "trade_share", ["old_dep"] + controls,
                 "B1: OADR → Trade Share")
    if r is not None:
        results.append(r)

    # Test 2: Z vector on trade share
    demo_vars = ["Z_1", "Z_2", "Z_3"]
    r = run_gls(dfw, "trade_share", demo_vars + controls,
                 "B2: Z → Trade Share")
    if r is not None:
        results.append(r)

    # Test 3: Tercile means
    tercile_stats = []
    for t in sorted(dfw["demo_tercile"].dropna().unique()):
        sub = dfw[dfw["demo_tercile"] == t]
        tercile_stats.append({
            "Tercile": int(t),
            "Label": {1: "Early (young)", 2: "Mid", 3: "Late (old)"}[int(t)],
            "N": len(sub),
            "Countries": sub["iso3"].nunique(),
            "Mean Trade Share": sub["trade_share"].mean(),
            "Mean |Trade/GDP|": sub["abs_trade"].mean(),
            "Mean |NonTrade/GDP|": sub["abs_nontrade"].mean(),
            "Mean OADR": sub["old_dep"].mean(),
        })

    # Write output
    lines = ["# Part B: Trade Share vs Aging — Channel Mismatch Mechanism", ""]
    lines.append("Referee §3.3: Directly demonstrates that the share of CA accounted for by trade")
    lines.append("declines with aging, establishing the mechanism by which fiscal leverage erodes.")
    lines.append("")

    if tercile_stats:
        lines.append("**Table R2: CA Composition by Demographic Tercile**")
        lines.append("")
        lines.append("| Tercile | Label | N | Countries | Mean OADR | Trade Share | |Trade/GDP| | |NonTrade/GDP| |")
        lines.append("|---------|-------|---|-----------|-----------|-------------|-----------|--------------|")
        for ts in tercile_stats:
            lines.append(
                f"| {ts['Tercile']} | {ts['Label']} | {ts['N']:,} | "
                f"{ts['Countries']} | {ts['Mean OADR']:.1f}% | "
                f"{ts['Mean Trade Share']:.3f} | "
                f"{ts['Mean |Trade/GDP|']:.2f} | "
                f"{ts['Mean |NonTrade/GDP|']:.2f} |")

    lines.append("")

    if results:
        rdf = pd.concat(results, ignore_index=True)
        lines.append("**Table R3: Regression — Trade Share on Aging**")
        lines.append("")
        lines.append("| Specification | Key Variable | Coef | SE | p-value | N | R² |")
        lines.append("|---------------|-------------|------|----|---------|---|-----|")
        for m in rdf["model"].unique():
            sub = rdf[rdf["model"] == m]
            row0 = sub.iloc[0]
            for kv in ["old_dep", "Z_1"]:
                kvrow = sub[sub["variable"] == kv]
                if len(kvrow) > 0:
                    kvrow = kvrow.iloc[0]
                    sig = stars(kvrow["p_value"])
                    lines.append(
                        f"| {m} | {kv} | "
                        f"{kvrow['coefficient']:.4f}{sig} | "
                        f"{kvrow['std_error']:.4f} | {kvrow['p_value']:.3f} | "
                        f"{row0['n_obs']:,} | {row0['r_squared']:.3f} |")

    lines.append("")
    lines.append("*Notes: Trade share = |Trade Balance/GDP| / (|Trade Balance/GDP| + |Non-Trade Residual/GDP|). "
                 "Panel GLS with entity and year FE, AR(1) correction.*")

    md_path = OUT_TABLES / "phase6_trade_share.md"
    md_path.write_text("\n".join(lines))
    print(f"\n  Output: {md_path}")
    return results, tercile_stats


# ═══════════════════════════════════════════════════════════════════════════
# Part C: Fiscal × Trade-Dominance Interaction
# ═══════════════════════════════════════════════════════════════════════════

def part_c_fiscal_trade_dominance(df):
    """Show fiscal coefficient is larger when trade dominates CA."""
    print("\n" + "=" * 70)
    print("PART C: Fiscal × Trade-Dominance Interaction")
    print("=" * 70)

    dfw = df.copy()
    dfw = dfw.dropna(subset=["trade_balance_gdp", "income_balance_gdp",
                              "fiscal_bal_gdp", "ca_gdp"]).copy()

    # Compute trade share
    dfw["abs_trade"] = dfw["trade_balance_gdp"].abs()
    dfw["abs_nontrade"] = dfw["income_balance_gdp"].abs()
    dfw["abs_total"] = dfw["abs_trade"] + dfw["abs_nontrade"]
    dfw["trade_share"] = (dfw["abs_trade"] / dfw["abs_total"].clip(lower=0.01)).clip(0, 1)

    # Trade dominance indicator: 1 if trade share above country-year median
    median_ts = dfw["trade_share"].median()
    dfw["trade_dominant"] = (dfw["trade_share"] > median_ts).astype(float)
    dfw["fiscal_x_trade_dom"] = dfw["fiscal_bal_gdp"] * dfw["trade_dominant"]

    # Also continuous version
    dfw["fiscal_x_trade_share"] = dfw["fiscal_bal_gdp"] * dfw["trade_share"]

    demo_vars = ["Z_1", "Z_2", "Z_3"]
    non_fiscal = ["nfa_gdp_lag", "log_rel_opw", "kaopen"]
    non_fiscal = [c for c in non_fiscal if c in dfw.columns and dfw[c].notna().sum() > 200]

    results = []

    # C1: Baseline twin deficit
    r = run_gls(dfw, "ca_gdp", ["fiscal_bal_gdp"] + non_fiscal + demo_vars,
                 "C1: Baseline twin deficit")
    if r is not None:
        results.append(r)

    # C2: fiscal × trade_dominant indicator
    r = run_gls(dfw, "ca_gdp",
                 ["fiscal_bal_gdp", "trade_dominant", "fiscal_x_trade_dom"]
                 + non_fiscal + demo_vars,
                 "C2: fiscal × trade_dominant (indicator)")
    if r is not None:
        results.append(r)

    # C3: fiscal × trade_share continuous
    r = run_gls(dfw, "ca_gdp",
                 ["fiscal_bal_gdp", "trade_share", "fiscal_x_trade_share"]
                 + non_fiscal + demo_vars,
                 "C3: fiscal × trade_share (continuous)")
    if r is not None:
        results.append(r)

    # C4: Split by trade dominance
    for td, label in [(1, "trade-dominant"), (0, "nontrade-dominant")]:
        sub = dfw[dfw["trade_dominant"] == td].copy()
        r = run_gls(sub, "ca_gdp", ["fiscal_bal_gdp"] + non_fiscal + demo_vars,
                     f"C4: fiscal (subsample: {label})")
        if r is not None:
            results.append(r)

    # Write output
    lines = ["# Part C: Fiscal × Trade-Dominance Interaction", ""]
    lines.append("Referee §3.3 Option B: Tests whether fiscal coefficient is larger when trade")
    lines.append("dominates the CA, directly linking channel mismatch to fiscal leverage erosion.")
    lines.append("")

    if results:
        rdf = pd.concat(results, ignore_index=True)

        lines.append("**Table R4: Fiscal Coefficient by Trade Dominance**")
        lines.append("")
        lines.append("| Specification | fiscal_bal Coef | SE | p-value | Interaction Coef | Int p | N | R² |")
        lines.append("|---------------|----------------|----|---------|-----------------|-------|---|-----|")
        for m in rdf["model"].unique():
            sub = rdf[rdf["model"] == m]
            row0 = sub.iloc[0]
            frow = sub[sub["variable"] == "fiscal_bal_gdp"]
            if len(frow) == 0:
                continue
            frow = frow.iloc[0]
            fsig = stars(frow["p_value"])

            # Check for interaction
            int_coef, int_p = "", ""
            for iv in ["fiscal_x_trade_dom", "fiscal_x_trade_share"]:
                irow = sub[sub["variable"] == iv]
                if len(irow) > 0:
                    irow = irow.iloc[0]
                    isig = stars(irow["p_value"])
                    int_coef = f"{irow['coefficient']:.3f}{isig}"
                    int_p = f"{irow['p_value']:.3f}"

            lines.append(
                f"| {m} | {frow['coefficient']:.3f}{fsig} | "
                f"{frow['std_error']:.3f} | {frow['p_value']:.3f} | "
                f"{int_coef} | {int_p} | "
                f"{row0['n_obs']:,} | {row0['r_squared']:.3f} |")

    lines.append("")
    lines.append("*Notes: Trade-dominant = |Trade Balance/GDP| > median share of total |CA components|. "
                 "Panel GLS with entity and year FE, AR(1) correction.*")

    md_path = OUT_TABLES / "phase6_trade_dominance.md"
    md_path.write_text("\n".join(lines))
    print(f"\n  Output: {md_path}")
    return results


# ═══════════════════════════════════════════════════════════════════════════
# Part D: KAOPEN Restoration Probe
# ═══════════════════════════════════════════════════════════════════════════

def part_d_kaopen_restoration(df):
    """Probe whether KAOPEN partially restores fiscal leverage in aging economies."""
    print("\n" + "=" * 70)
    print("PART D: KAOPEN Restoration Probe")
    print("=" * 70)

    dfw = df.copy()

    demo_vars = ["Z_1", "Z_2", "Z_3"]
    non_fiscal = ["nfa_gdp_lag", "log_rel_opw", "kaopen"]
    non_fiscal = [c for c in non_fiscal if c in dfw.columns and dfw[c].notna().sum() > 200]

    # Create KAOPEN terciles (time-invariant, based on country mean)
    kaopen_means = dfw.groupby("iso3")["kaopen"].mean()
    kaopen_t1 = kaopen_means.quantile(1/3)
    kaopen_t2 = kaopen_means.quantile(2/3)
    kaopen_tercile_map = {}
    for iso3, val in kaopen_means.items():
        if val <= kaopen_t1:
            kaopen_tercile_map[iso3] = 1
        elif val <= kaopen_t2:
            kaopen_tercile_map[iso3] = 2
        else:
            kaopen_tercile_map[iso3] = 3
    dfw["kaopen_tercile"] = dfw["iso3"].map(kaopen_tercile_map)

    results = []
    factorial_results = []

    # D1: 2×3 factorial: demo_tercile × KAOPEN_tercile
    print("\n  --- 2×3 Factorial: Demo Tercile × KAOPEN Tercile ---")
    for dt in [1, 2, 3]:
        for kt in [1, 2, 3]:
            sub = dfw[(dfw["demo_tercile"] == dt) & (dfw["kaopen_tercile"] == kt)].copy()
            dt_label = {1: "Young", 2: "Mid", 3: "Old"}[dt]
            kt_label = {1: "Closed", 2: "Mid", 3: "Open"}[kt]
            label = f"D1: {dt_label} × {kt_label}"

            r = run_gls(sub, "ca_gdp", ["fiscal_bal_gdp"] + non_fiscal + demo_vars, label)
            if r is not None:
                results.append(r)
                fcoef, fp = get_coef(r, "fiscal_bal_gdp")
                factorial_results.append({
                    "Demo Tercile": dt_label,
                    "KAOPEN Tercile": kt_label,
                    "Fiscal Coef": fcoef,
                    "p-value": fp,
                    "N": r.iloc[0]["n_obs"],
                    "Countries": r.iloc[0]["n_countries"],
                    "R²": r.iloc[0]["r_squared"],
                })

    # D2: KAOPEN interaction within late-transition (old) tercile only
    print("\n  --- KAOPEN Interaction Within Late-Transition Tercile ---")
    late = dfw[dfw["demo_tercile"] == 3].copy()
    late["fiscal_x_kaopen"] = late["fiscal_bal_gdp"] * late["kaopen"]

    r = run_gls(late, "ca_gdp",
                 ["fiscal_bal_gdp", "fiscal_x_kaopen"] + non_fiscal + demo_vars,
                 "D2: fiscal×KAOPEN (late-transition only)")
    if r is not None:
        results.append(r)

    # D3: KAOPEN high vs low within late-transition
    for kt in [1, 3]:
        sub = late[late["kaopen_tercile"] == kt].copy()
        kt_label = {1: "Closed", 3: "Open"}[kt]
        r = run_gls(sub, "ca_gdp", ["fiscal_bal_gdp"] + non_fiscal + demo_vars,
                     f"D3: fiscal (old × {kt_label})")
        if r is not None:
            results.append(r)

    # D4: KAOPEN continuous interaction within each demographic tercile
    print("\n  --- KAOPEN Interaction Within Each Demo Tercile ---")
    for dt in [1, 2, 3]:
        sub = dfw[dfw["demo_tercile"] == dt].copy()
        sub["fiscal_x_kaopen"] = sub["fiscal_bal_gdp"] * sub["kaopen"]
        dt_label = {1: "Young", 2: "Mid", 3: "Old"}[dt]
        r = run_gls(sub, "ca_gdp",
                     ["fiscal_bal_gdp", "fiscal_x_kaopen"] + non_fiscal + demo_vars,
                     f"D4: fiscal×KAOPEN ({dt_label} tercile)")
        if r is not None:
            results.append(r)

    # Write output
    lines = ["# Part D: KAOPEN Restoration Probe", ""]
    lines.append("Probes whether financial openness partially restores fiscal leverage")
    lines.append("in aging economies, and how robust this finding is.")
    lines.append("")

    if factorial_results:
        lines.append("**Table R5: 3×3 Factorial — Fiscal Coefficient by Demo × KAOPEN Tercile**")
        lines.append("")
        lines.append("| Demo Tercile | KAOPEN Tercile | Fiscal Coef | p-value | N | Countries | R² |")
        lines.append("|-------------|----------------|-------------|---------|---|-----------|-----|")
        for fr in factorial_results:
            sig = stars(fr["p-value"]) if fr["p-value"] is not None else ""
            coef_str = f"{fr['Fiscal Coef']:.3f}{sig}" if fr["Fiscal Coef"] is not None else "—"
            p_str = f"{fr['p-value']:.3f}" if fr["p-value"] is not None else "—"
            lines.append(
                f"| {fr['Demo Tercile']} | {fr['KAOPEN Tercile']} | "
                f"{coef_str} | {p_str} | {fr['N']:,} | "
                f"{fr['Countries']} | {fr['R²']:.3f} |")

    lines.append("")

    if results:
        rdf = pd.concat(results, ignore_index=True)

        # D2 and D3 results
        lines.append("**Table R6: KAOPEN Interaction Within Late-Transition (Old) Tercile**")
        lines.append("")
        lines.append("| Specification | fiscal Coef | SE | p | Interaction | Int p | N | R² |")
        lines.append("|---------------|------------|-----|---|-------------|-------|---|----|")
        for m in rdf["model"].unique():
            if not m.startswith("D2") and not m.startswith("D3"):
                continue
            sub = rdf[rdf["model"] == m]
            row0 = sub.iloc[0]
            frow = sub[sub["variable"] == "fiscal_bal_gdp"]
            if len(frow) == 0:
                continue
            frow = frow.iloc[0]
            fsig = stars(frow["p_value"])

            int_str, int_p = "", ""
            irow = sub[sub["variable"] == "fiscal_x_kaopen"]
            if len(irow) > 0:
                irow = irow.iloc[0]
                isig = stars(irow["p_value"])
                int_str = f"{irow['coefficient']:.3f}{isig}"
                int_p = f"{irow['p_value']:.3f}"

            lines.append(
                f"| {m} | {frow['coefficient']:.3f}{fsig} | "
                f"{frow['std_error']:.3f} | {frow['p_value']:.3f} | "
                f"{int_str} | {int_p} | "
                f"{row0['n_obs']:,} | {row0['r_squared']:.3f} |")

        # D4 results
        lines.append("")
        lines.append("**Table R7: KAOPEN Interaction Within Each Demographic Tercile**")
        lines.append("")
        lines.append("| Tercile | fiscal Coef | SE | p | fiscal×KAOPEN | Int p | N | R² |")
        lines.append("|---------|------------|-----|---|---------------|-------|---|----|")
        for m in rdf["model"].unique():
            if not m.startswith("D4"):
                continue
            sub = rdf[rdf["model"] == m]
            row0 = sub.iloc[0]
            frow = sub[sub["variable"] == "fiscal_bal_gdp"]
            if len(frow) == 0:
                continue
            frow = frow.iloc[0]
            fsig = stars(frow["p_value"])

            irow = sub[sub["variable"] == "fiscal_x_kaopen"]
            int_str, int_p = "", ""
            if len(irow) > 0:
                irow = irow.iloc[0]
                isig = stars(irow["p_value"])
                int_str = f"{irow['coefficient']:.3f}{isig}"
                int_p = f"{irow['p_value']:.3f}"

            lines.append(
                f"| {m} | {frow['coefficient']:.3f}{fsig} | "
                f"{frow['std_error']:.3f} | {frow['p_value']:.3f} | "
                f"{int_str} | {int_p} | "
                f"{row0['n_obs']:,} | {row0['r_squared']:.3f} |")

    lines.append("")
    lines.append("*Notes: KAOPEN terciles based on country-mean openness (time-invariant). "
                 "Panel GLS with entity and year FE, AR(1) correction.*")

    md_path = OUT_TABLES / "phase6_kaopen_restoration.md"
    md_path.write_text("\n".join(lines))
    print(f"\n  Output: {md_path}")
    return results, factorial_results


# ═══════════════════════════════════════════════════════════════════════════
# Main
# ═══════════════════════════════════════════════════════════════════════════

def main():
    print("=" * 70)
    print("PHASE 6: Referee R1 Revisions — Paper A (Twin Deficits)")
    print("=" * 70)

    df = load_panel()
    print(f"  Panel: {df['iso3'].nunique()} countries, {len(df):,} obs, "
          f"{df['year'].min()}-{df['year'].max()}")

    part_a_winsorization(df)
    part_b_trade_share(df)
    part_c_fiscal_trade_dominance(df)
    part_d_kaopen_restoration(df)

    print("\n" + "=" * 70)
    print("PHASE 6 COMPLETE")
    print("=" * 70)
    print("  Output files:")
    for f in sorted(OUT_TABLES.glob("phase6_*.md")):
        print(f"    {f.name}")


if __name__ == "__main__":
    main()
